{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3.4 比较错误率\n",
    "选择两个UCI数据集，比较十折交叉验证法和留一法所估计出的对率回归的错误率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import time\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.model_selection import cross_val_predict\n",
    "from sklearn.model_selection import LeaveOneOut\n",
    "from sklearn import model_selection\n",
    "from sklearn import metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# (1) Blood Transfusion Service Center Data Set 二类\n",
    "# blood.txt Recency (months),Frequency (times),Monetary (c.c. blood),Time (months),\"whether he/she donated blood in March 2007\"\n",
    "# (2) Iris 三类\n",
    "# iris.txt \n",
    "\n",
    "# 最后一列是标签"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def cal_error(data):\n",
    "    x = data[:,:-1]\n",
    "    y = data[:,-1]\n",
    "    stime = time.time()\n",
    "    lr = LogisticRegression()    \n",
    "    y_pred = cross_val_predict(lr, x, y, cv=10)\n",
    "    print('Cross Validation：',metrics.accuracy_score(y, y_pred),'   time:',time.time()-stime)\n",
    "    stime = time.time()\n",
    "    lr = LogisticRegression()\n",
    "    loo = LeaveOneOut()\n",
    "    accuracy = 0;\n",
    "    for train, test in loo.split(x):\n",
    "        lr.fit(x[train], y[train])  # fitting\n",
    "        y_p = lr.predict(x[test])\n",
    "        if y_p == y[test] : accuracy += 1  \n",
    "    print('Leave One Out:',accuracy / np.shape(x)[0],'   time:',time.time()-stime)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cross Validation： 0.764705882353    time: 0.19113802909851074\n",
      "Leave One Out: 0.7700534759358288    time: 6.740789175033569\n"
     ]
    }
   ],
   "source": [
    "blood = np.loadtxt('data/blood.txt',delimiter=',')\n",
    "cal_error(blood)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cross Validation： 0.953333333333    time: 0.15811967849731445\n",
      "Leave One Out: 0.9533333333333334    time: 0.739518404006958\n"
     ]
    }
   ],
   "source": [
    "iris = np.loadtxt('data/iris.txt',delimiter=',')\n",
    "cal_error(iris)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
